In [16]:
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
In [17]:
df = pd.read_csv("Data.csv")
In [18]:
df
Out[18]:
Country Age Salary Purchased
0 France 44.0 72000.0 Yes
1 Spain 27.0 48000.0 Yes
2 NaN 30.0 54000.0 NaN
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
5 France 35.0 58000.0 Yes
6 Spain NaN 52000.0 No
7 France 48.0 79000.0 Yes
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
In [20]:
imputer = SimpleImputer(missing_values = np.nan, strategy = 'most_frequent')

imputer.fit(df.iloc[:,:].values)

df.iloc[:,:] = imputer.transform(df.iloc[:,:].values)
In [ ]:
df
Out[ ]:
array([['France', 44.0, 72000.0, 'Yes'],
       ['Spain', 27.0, 48000.0, 'Yes'],
       ['France', 30.0, 54000.0, 'Yes'],
       ['Spain', 38.0, 61000.0, 'No'],
       ['Germany', 40.0, 48000.0, 'Yes'],
       ['France', 35.0, 58000.0, 'Yes'],
       ['Spain', 27.0, 52000.0, 'No'],
       ['France', 48.0, 79000.0, 'Yes'],
       ['Germany', 50.0, 83000.0, 'No'],
       ['France', 37.0, 67000.0, 'Yes']], dtype=object)
In [ ]: